ultimate_summary_table <- read.csv("/restricted/projectnb/agedisease/projects/pancancer_aging_clocks/scripts/GitCore/results/Ultimate_summary_table.csv", row.names = NULL)
reactable(
ultimate_summary_table %>%
mutate(across(where(~is.numeric(.) && any(. %% 1 != 0, na.rm = TRUE)),
~signif(., digits = 3))),
searchable = TRUE
)
ultimate_summary_table$Rsquared <- as.numeric(ultimate_summary_table$Rsquared)
ultimate_summary_table$features <- as.numeric(ultimate_summary_table$features)
ultimate_summary_table$zero_weight_features <- as.numeric(ultimate_summary_table$zero_weight_features)
ultimate_summary_table$non_zero_weight_features <- as.numeric(ultimate_summary_table$non_zero_weight_features)
# Remove NA values
plot_data <- na.omit(ultimate_summary_table)
plot_data <- plot_data[order(plot_data$features), ]
barplot(
height = plot_data$Rsquared, # Y-axis (R-squared values)
names.arg = plot_data$features,
col = "steelblue", # Bar color
border = NA,
xlab = "Number of Features",
ylab = "R-Squared",
main = "R-Squared vs Number of Features",
ylim = c(0, 1), # Set correct y-axis range
las = 1 # Make y-axis labels horizontal for readability
)
axis(2, at = seq(0, 1, by = 0.1), las = 1)
plot_data <- plot_data[order(plot_data$zero_weight_features), ]
barplot(
height = plot_data$Rsquared, # Y-axis (R-squared values)
names.arg = plot_data$zero_weight_features,
col = "firebrick", # Bar color
border = NA,
xlab = "Number of Features",
ylab = "R-Squared",
main = "R-Squared vs Number of Zero_weight_Features",
ylim = c(0, 1), # Set correct y-axis range
las = 1 # Make y-axis labels horizontal for readability
)
axis(2, at = seq(0, 1, by = 0.1), las = 1)
plot_data <- plot_data[order(plot_data$non_zero_weight_features), ]
barplot(
height = plot_data$Rsquared, # Y-axis (R-squared values)
names.arg = plot_data$non_zero_weight_features,
col = "forestgreen", # Bar color
border = NA,
xlab = "Number of Features",
ylab = "R-Squared",
main = "R-Squared vs Number of Non_zero_weight_Features",
ylim = c(0, 1), # Set correct y-axis range
las = 1 # Make y-axis labels horizontal for readability
)
axis(2, at = seq(0, 1, by = 0.1), las = 1)
## Baseline Models per Cancer
# Get unique cancer types
cancer_types <- unique(ultimate_summary_table$cancer_type)
# Define the threshold line
threshold <- -log10(0.05)
# Loop through each cancer type and create a bar plot
for (cancer in cancer_types) {
subset <- subset(ultimate_summary_table, cancer_type == cancer)
# Transform p-values
subset$log_pval <- -log10(subset$chronological_pval_baseline)
p <- ggplot(subset, aes(x = layer_combination, y = log_pval)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_hline(yintercept = threshold, linetype = "dotted", color = "red", size = 1) +
labs(title = paste("Chronological P-Value Baseline (-log10) for", cancer),
x = "Layer Combination",
y = "-log10(Chronological P-Value Baseline)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p)
}
## Heatmaps
REMOVED BRCA BECAUSE IT HAS NO DATA YET —->>> … Heatmap ordered from lowest to highest baseline chronological pval on survival, anything to the left of KICH, including KICH is has chronological age as a significant contributor of survival
##########Defining column and row orders####################
#row order as complexity increases & column order as least to most age related at the baseline survival model
r_order <- c("miRNA","RPPA","RNAseq","methylation","RNAseq_RPPA","methylation_RNAseq","methylation_miRNA", "methylation_RPPA","miRNA_RNAseq","miRNA_RPPA", "miRNA_RNAseq_RPPA", "methylation_RNAseq_RPPA","methylation_miRNA_RNAseq","methylation_miRNA_RPPA", "methylation_miRNA_RNAseq_RPPA")
# Compute median chronological_pval_baseline for each cancer type and order it
median_pvals <- aggregate(chronological_pval_baseline ~ cancer_type,
data = ultimate_summary_table,
FUN = median)
# Order from lowest to highest
median_pvals <- median_pvals[order(median_pvals$chronological_pval_baseline), ]
c_order <- median_pvals$cancer_type
##############################################################
# Ensure the columns are numeric and remove NA values
ultimate_summary_table <- ultimate_summary_table %>%
mutate(delta_age_pval_non_interaction = as.numeric(delta_age_pval_non_interaction)) %>%
replace_na(list(delta_age_pval_non_interaction = 1))
# Standardize layer_combinations by sorting and collapsing duplicate pairs
ultimate_summary_table <- ultimate_summary_table %>%
mutate(layer_combination = sapply(strsplit(layer_combination, "_"), function(x) paste(sort(x), collapse = "_"))) %>%
group_by(cancer_type, layer_combination) %>%
summarise(delta_age_pval_non_interaction = mean(delta_age_pval_non_interaction, na.rm = TRUE), .groups = "drop")
# Convert the data to a matrix for the heatmap
heatmap_data <- ultimate_summary_table %>%
select(cancer_type, layer_combination, delta_age_pval_non_interaction) %>%
pivot_wider(names_from = cancer_type, values_from = delta_age_pval_non_interaction, values_fn = mean, values_fill = 1) %>%
column_to_rownames(var = "layer_combination")
min(heatmap_data) #8.232179e-11
## [1] 8.232179e-11
max(heatmap_data) #1
## [1] 1
heatmap_data <- data.frame(lapply(heatmap_data, as.numeric), row.names = rownames(heatmap_data))
heatmap_data <- as.matrix(heatmap_data)
# Apply row and column ordering
heatmap_data <- heatmap_data[r_order,]
c_order <- setdiff(c_order, "BRCA") #################TAKE OUT WHEN BRCA DONE :) ###########################
heatmap_data <- heatmap_data[,c_order]
# Define color scale
col_fun <- colorRamp2(c(min(heatmap_data, na.rm = TRUE),
0.05,
max(heatmap_data, na.rm = TRUE)),
c("red", "#f16913","white"))
#c("white","#fee6ce","#fdae6b", "#f16913", "#d94801", "#7f2704")
#Generate a Heatmap
Heatmap(heatmap_data, col = col_fun,
layer_fun = function(j, i, x, y, width, height, fill) {
grid.rect(x, y, width, height, gp = gpar(col = NA, fill = fill))
if("KICH" %in% colnames(heatmap_data)) {
kich_index <- which(colnames(heatmap_data) == "KICH") / ncol(heatmap_data)
grid.lines(x = unit(rep(kich_index, 2), "npc"),
y = unit(c(0, 1), "npc"),
gp = gpar(lty = "dotted", col = "black", lwd = 2))
}
},
top_annotation = HeatmapAnnotation(
annotation_custom = function(index) {
grid::grid.lines(x = unit(which(colnames(heatmap_data) == "KICH") / ncol(heatmap_data), "npc"),
y = unit(c(0, 1), "npc"),
gp = grid::gpar(lty = "dotted", col = "black"))
}
),
name = "Delta Age P-Value",
cluster_rows = FALSE,
cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8),
heatmap_legend_param = list(
title = "P-Value",
legend_height = unit(3, "cm"),
direction = "horizontal"
))
#Number of features and model performance (r^2) ===> Visualize the impact of number of features with model performance
#-log(Pval) and model performance (r^2) ===> Visualize the impact of fit of age with survival model performance